{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-01-14T12:48:04.450500Z",
     "start_time": "2019-01-14T12:48:03.586150Z"
    }
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/aerdem/projects/lofo-importance/lofo/lofo_importance.py:3: TqdmExperimentalWarning: Using `tqdm.autonotebook.tqdm` in notebook mode. Use `tqdm.tqdm` instead to force console mode (e.g. in jupyter console)\n",
      "  from tqdm.autonotebook import tqdm\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "from sklearn.linear_model import LinearRegression\n",
    "from sklearn.ensemble import RandomForestClassifier\n",
    "from sklearn.model_selection import KFold\n",
    "from lofo import LOFOImportance, FLOFOImportance, Dataset, plot_importance"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-01-14T12:48:04.513904Z",
     "start_time": "2019-01-14T12:48:04.453322Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>A</th>\n",
       "      <th>B</th>\n",
       "      <th>C</th>\n",
       "      <th>D</th>\n",
       "      <th>D2</th>\n",
       "      <th>target</th>\n",
       "      <th>binary_target</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.548814</td>\n",
       "      <td>0.592880</td>\n",
       "      <td>0.811518</td>\n",
       "      <td>0.413962</td>\n",
       "      <td>0.443227</td>\n",
       "      <td>1.486305</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.715189</td>\n",
       "      <td>0.010064</td>\n",
       "      <td>0.476084</td>\n",
       "      <td>0.629618</td>\n",
       "      <td>0.686270</td>\n",
       "      <td>0.529949</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.602763</td>\n",
       "      <td>0.475826</td>\n",
       "      <td>0.523156</td>\n",
       "      <td>0.778584</td>\n",
       "      <td>0.792326</td>\n",
       "      <td>1.434674</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.544883</td>\n",
       "      <td>0.708770</td>\n",
       "      <td>0.250521</td>\n",
       "      <td>0.851558</td>\n",
       "      <td>0.886529</td>\n",
       "      <td>1.952046</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.423655</td>\n",
       "      <td>0.043975</td>\n",
       "      <td>0.605043</td>\n",
       "      <td>0.816413</td>\n",
       "      <td>0.821734</td>\n",
       "      <td>0.480267</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          A         B         C         D        D2    target  binary_target\n",
       "0  0.548814  0.592880  0.811518  0.413962  0.443227  1.486305              1\n",
       "1  0.715189  0.010064  0.476084  0.629618  0.686270  0.529949              0\n",
       "2  0.602763  0.475826  0.523156  0.778584  0.792326  1.434674              1\n",
       "3  0.544883  0.708770  0.250521  0.851558  0.886529  1.952046              1\n",
       "4  0.423655  0.043975  0.605043  0.816413  0.821734  0.480267              0"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from data.test_data import generate_test_data, generate_unstructured_test_data\n",
    "\n",
    "df = generate_test_data(1000)\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-01-14T12:48:04.689772Z",
     "start_time": "2019-01-14T12:48:04.527994Z"
    }
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "581710f7ae654f5387a06a2befa408e4",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>feature</th>\n",
       "      <th>importance_mean</th>\n",
       "      <th>importance_std</th>\n",
       "      <th>val_imp_0</th>\n",
       "      <th>val_imp_1</th>\n",
       "      <th>val_imp_2</th>\n",
       "      <th>val_imp_3</th>\n",
       "      <th>val_imp_4</th>\n",
       "      <th>val_imp_5</th>\n",
       "      <th>val_imp_6</th>\n",
       "      <th>val_imp_7</th>\n",
       "      <th>val_imp_8</th>\n",
       "      <th>val_imp_9</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>B</td>\n",
       "      <td>0.540217</td>\n",
       "      <td>0.016008</td>\n",
       "      <td>0.532118</td>\n",
       "      <td>0.544225</td>\n",
       "      <td>0.513259</td>\n",
       "      <td>0.524307</td>\n",
       "      <td>0.525612</td>\n",
       "      <td>0.542536</td>\n",
       "      <td>0.550588</td>\n",
       "      <td>5.682745e-01</td>\n",
       "      <td>0.559869</td>\n",
       "      <td>0.541383</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>D</td>\n",
       "      <td>0.089187</td>\n",
       "      <td>0.002629</td>\n",
       "      <td>0.088832</td>\n",
       "      <td>0.086291</td>\n",
       "      <td>0.087612</td>\n",
       "      <td>0.085380</td>\n",
       "      <td>0.086004</td>\n",
       "      <td>0.090378</td>\n",
       "      <td>0.091582</td>\n",
       "      <td>9.345964e-02</td>\n",
       "      <td>0.090800</td>\n",
       "      <td>0.091527</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>A</td>\n",
       "      <td>0.088167</td>\n",
       "      <td>0.002935</td>\n",
       "      <td>0.090739</td>\n",
       "      <td>0.086158</td>\n",
       "      <td>0.085259</td>\n",
       "      <td>0.093299</td>\n",
       "      <td>0.088281</td>\n",
       "      <td>0.088402</td>\n",
       "      <td>0.083172</td>\n",
       "      <td>9.189529e-02</td>\n",
       "      <td>0.087086</td>\n",
       "      <td>0.087376</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>C</td>\n",
       "      <td>0.000002</td>\n",
       "      <td>0.000033</td>\n",
       "      <td>0.000088</td>\n",
       "      <td>-0.000020</td>\n",
       "      <td>-0.000012</td>\n",
       "      <td>-0.000027</td>\n",
       "      <td>-0.000016</td>\n",
       "      <td>0.000004</td>\n",
       "      <td>0.000031</td>\n",
       "      <td>-8.312825e-07</td>\n",
       "      <td>-0.000002</td>\n",
       "      <td>-0.000021</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  feature  importance_mean  importance_std  val_imp_0  val_imp_1  val_imp_2  \\\n",
       "1       B         0.540217        0.016008   0.532118   0.544225   0.513259   \n",
       "3       D         0.089187        0.002629   0.088832   0.086291   0.087612   \n",
       "0       A         0.088167        0.002935   0.090739   0.086158   0.085259   \n",
       "2       C         0.000002        0.000033   0.000088  -0.000020  -0.000012   \n",
       "\n",
       "   val_imp_3  val_imp_4  val_imp_5  val_imp_6     val_imp_7  val_imp_8  \\\n",
       "1   0.524307   0.525612   0.542536   0.550588  5.682745e-01   0.559869   \n",
       "3   0.085380   0.086004   0.090378   0.091582  9.345964e-02   0.090800   \n",
       "0   0.093299   0.088281   0.088402   0.083172  9.189529e-02   0.087086   \n",
       "2  -0.000027  -0.000016   0.000004   0.000031 -8.312825e-07  -0.000002   \n",
       "\n",
       "   val_imp_9  \n",
       "1   0.541383  \n",
       "3   0.091527  \n",
       "0   0.087376  \n",
       "2  -0.000021  "
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "lr = LinearRegression()\n",
    "lr.fit(df[[\"A\", \"B\", \"C\", \"D\"]], df[\"target\"])\n",
    "\n",
    "fi = FLOFOImportance(lr, df, [\"A\", \"B\", \"C\", \"D\"], 'target', scoring=\"neg_mean_absolute_error\")\n",
    "\n",
    "importances = fi.get_importance()\n",
    "importances"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "83d88dddf5bd4965937f0ba96949fcd2",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>feature</th>\n",
       "      <th>importance_mean</th>\n",
       "      <th>importance_std</th>\n",
       "      <th>val_imp_0</th>\n",
       "      <th>val_imp_1</th>\n",
       "      <th>val_imp_2</th>\n",
       "      <th>val_imp_3</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>B</td>\n",
       "      <td>0.447206</td>\n",
       "      <td>0.024244</td>\n",
       "      <td>0.432768</td>\n",
       "      <td>0.418559</td>\n",
       "      <td>0.454714</td>\n",
       "      <td>0.482782</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>A</td>\n",
       "      <td>0.053247</td>\n",
       "      <td>0.006699</td>\n",
       "      <td>0.049021</td>\n",
       "      <td>0.044487</td>\n",
       "      <td>0.060269</td>\n",
       "      <td>0.059213</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>D</td>\n",
       "      <td>0.052560</td>\n",
       "      <td>0.003008</td>\n",
       "      <td>0.051912</td>\n",
       "      <td>0.057638</td>\n",
       "      <td>0.050646</td>\n",
       "      <td>0.050044</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>C</td>\n",
       "      <td>-0.000057</td>\n",
       "      <td>0.000116</td>\n",
       "      <td>0.000119</td>\n",
       "      <td>-0.000165</td>\n",
       "      <td>-0.000023</td>\n",
       "      <td>-0.000159</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  feature  importance_mean  importance_std  val_imp_0  val_imp_1  val_imp_2  \\\n",
       "1       B         0.447206        0.024244   0.432768   0.418559   0.454714   \n",
       "3       A         0.053247        0.006699   0.049021   0.044487   0.060269   \n",
       "2       D         0.052560        0.003008   0.051912   0.057638   0.050646   \n",
       "0       C        -0.000057        0.000116   0.000119  -0.000165  -0.000023   \n",
       "\n",
       "   val_imp_3  \n",
       "1   0.482782  \n",
       "3   0.059213  \n",
       "2   0.050044  \n",
       "0  -0.000159  "
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.metrics import make_scorer, mean_absolute_error\n",
    "\n",
    "scorer = make_scorer(mean_absolute_error, greater_is_better=False)\n",
    "cv = KFold(n_splits=4, shuffle=True, random_state=0)\n",
    "\n",
    "dataset = Dataset(df=df, target=\"target\", features=[\"A\", \"B\", \"C\", \"D\"])\n",
    "fi = LOFOImportance(dataset, scoring=scorer, model=LinearRegression(), cv=cv)\n",
    "\n",
    "importances = fi.get_importance()\n",
    "importances"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-01-14T12:48:05.103111Z",
     "start_time": "2019-01-14T12:48:04.692682Z"
    }
   },
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "b3feb8d91fc54a699c316d64a3729bb6",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>feature</th>\n",
       "      <th>importance_mean</th>\n",
       "      <th>importance_std</th>\n",
       "      <th>val_imp_0</th>\n",
       "      <th>val_imp_1</th>\n",
       "      <th>val_imp_2</th>\n",
       "      <th>val_imp_3</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>B</td>\n",
       "      <td>0.414</td>\n",
       "      <td>0.025377</td>\n",
       "      <td>0.432</td>\n",
       "      <td>0.416</td>\n",
       "      <td>0.436</td>\n",
       "      <td>0.372</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>D</td>\n",
       "      <td>0.040</td>\n",
       "      <td>0.007483</td>\n",
       "      <td>0.032</td>\n",
       "      <td>0.052</td>\n",
       "      <td>0.040</td>\n",
       "      <td>0.036</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>A</td>\n",
       "      <td>0.038</td>\n",
       "      <td>0.012806</td>\n",
       "      <td>0.044</td>\n",
       "      <td>0.024</td>\n",
       "      <td>0.056</td>\n",
       "      <td>0.028</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>C</td>\n",
       "      <td>0.016</td>\n",
       "      <td>0.007483</td>\n",
       "      <td>0.016</td>\n",
       "      <td>0.028</td>\n",
       "      <td>0.008</td>\n",
       "      <td>0.012</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  feature  importance_mean  importance_std  val_imp_0  val_imp_1  val_imp_2  \\\n",
       "1       B            0.414        0.025377      0.432      0.416      0.436   \n",
       "2       D            0.040        0.007483      0.032      0.052      0.040   \n",
       "3       A            0.038        0.012806      0.044      0.024      0.056   \n",
       "0       C            0.016        0.007483      0.016      0.028      0.008   \n",
       "\n",
       "   val_imp_3  \n",
       "1      0.372  \n",
       "2      0.036  \n",
       "3      0.028  \n",
       "0      0.012  "
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rf = RandomForestClassifier(n_estimators=10, max_depth=5, random_state=0)\n",
    "\n",
    "dataset = Dataset(df=df, target=\"binary_target\", features=[\"A\", \"B\", \"C\", \"D\"])\n",
    "fi = LOFOImportance(dataset, scoring='accuracy', model=rf, cv=cv)\n",
    "\n",
    "importances = fi.get_importance()\n",
    "importances"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "8c0f86bcbf3147ed8944d3da1e96aa6c",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>feature</th>\n",
       "      <th>importance_mean</th>\n",
       "      <th>importance_std</th>\n",
       "      <th>val_imp_0</th>\n",
       "      <th>val_imp_1</th>\n",
       "      <th>val_imp_2</th>\n",
       "      <th>val_imp_3</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>B</td>\n",
       "      <td>0.210579</td>\n",
       "      <td>0.006848</td>\n",
       "      <td>0.218465</td>\n",
       "      <td>0.199696</td>\n",
       "      <td>0.210972</td>\n",
       "      <td>0.213183</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>A</td>\n",
       "      <td>0.011146</td>\n",
       "      <td>0.006722</td>\n",
       "      <td>0.002382</td>\n",
       "      <td>0.007626</td>\n",
       "      <td>0.014447</td>\n",
       "      <td>0.020127</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>D</td>\n",
       "      <td>0.008892</td>\n",
       "      <td>0.005367</td>\n",
       "      <td>0.000255</td>\n",
       "      <td>0.008582</td>\n",
       "      <td>0.012859</td>\n",
       "      <td>0.013872</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>C</td>\n",
       "      <td>-0.000171</td>\n",
       "      <td>0.004295</td>\n",
       "      <td>-0.001984</td>\n",
       "      <td>-0.005786</td>\n",
       "      <td>0.001156</td>\n",
       "      <td>0.005931</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  feature  importance_mean  importance_std  val_imp_0  val_imp_1  val_imp_2  \\\n",
       "1       B         0.210579        0.006848   0.218465   0.199696   0.210972   \n",
       "3       A         0.011146        0.006722   0.002382   0.007626   0.014447   \n",
       "2       D         0.008892        0.005367   0.000255   0.008582   0.012859   \n",
       "0       C        -0.000171        0.004295  -0.001984  -0.005786   0.001156   \n",
       "\n",
       "   val_imp_3  \n",
       "1   0.213183  \n",
       "3   0.020127  \n",
       "2   0.013872  \n",
       "0   0.005931  "
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = generate_unstructured_test_data(10000)\n",
    "\n",
    "dataset = Dataset(df=df, target=\"binary_target\", features=[\"A\", \"B\", \"C\", \"D\"])\n",
    "fi = LOFOImportance(dataset, 'roc_auc')\n",
    "\n",
    "importances = fi.get_importance()\n",
    "importances"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "0bebf9dcd42d41cb862a45b1678d6807",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>feature</th>\n",
       "      <th>importance_mean</th>\n",
       "      <th>importance_std</th>\n",
       "      <th>val_imp_0</th>\n",
       "      <th>val_imp_1</th>\n",
       "      <th>val_imp_2</th>\n",
       "      <th>val_imp_3</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>E</td>\n",
       "      <td>0.535000</td>\n",
       "      <td>0.004561</td>\n",
       "      <td>0.532726</td>\n",
       "      <td>0.529386</td>\n",
       "      <td>0.541721</td>\n",
       "      <td>0.536167</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>B</td>\n",
       "      <td>0.300475</td>\n",
       "      <td>0.003054</td>\n",
       "      <td>0.299546</td>\n",
       "      <td>0.301227</td>\n",
       "      <td>0.296328</td>\n",
       "      <td>0.304798</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>D</td>\n",
       "      <td>0.047203</td>\n",
       "      <td>0.001125</td>\n",
       "      <td>0.048536</td>\n",
       "      <td>0.047944</td>\n",
       "      <td>0.046706</td>\n",
       "      <td>0.045625</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>A</td>\n",
       "      <td>0.038683</td>\n",
       "      <td>0.001377</td>\n",
       "      <td>0.038191</td>\n",
       "      <td>0.040831</td>\n",
       "      <td>0.037031</td>\n",
       "      <td>0.038679</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>C</td>\n",
       "      <td>-0.000693</td>\n",
       "      <td>0.000305</td>\n",
       "      <td>-0.000725</td>\n",
       "      <td>-0.000689</td>\n",
       "      <td>-0.000249</td>\n",
       "      <td>-0.001109</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  feature  importance_mean  importance_std  val_imp_0  val_imp_1  val_imp_2  \\\n",
       "0       E         0.535000        0.004561   0.532726   0.529386   0.541721   \n",
       "2       B         0.300475        0.003054   0.299546   0.301227   0.296328   \n",
       "4       D         0.047203        0.001125   0.048536   0.047944   0.046706   \n",
       "3       A         0.038683        0.001377   0.038191   0.040831   0.037031   \n",
       "1       C        -0.000693        0.000305  -0.000725  -0.000689  -0.000249   \n",
       "\n",
       "   val_imp_3  \n",
       "0   0.536167  \n",
       "2   0.304798  \n",
       "4   0.045625  \n",
       "3   0.038679  \n",
       "1  -0.001109  "
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dataset = Dataset(df=df, target=\"target\", features=[\"A\", \"B\", \"C\", \"D\", \"E\"])\n",
    "fi = LOFOImportance(dataset, scorer, n_jobs=-1)\n",
    "\n",
    "importances = fi.get_importance()\n",
    "importances"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAekAAAHSCAYAAADIczP5AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAYI0lEQVR4nO3df7DddZ3f8dcnCRCWX62AFoGY4CDIEsKPm1SYsgZYRUdFC4zKug4w66BLUTvtSHHakaVOu+2uXaxoBWe74DK4OtphxtrW+mPJLGgVLjboCBiEDZhqazarIbIJmx/v/pFrDCE/Tticcz733sdj5g7n1z3f9/3cy33me873nNuqKgBAf+aMewAAYPdEGgA6JdIA0CmRBoBOiTQAdEqkAaBT88Y9wK6OOeaYWrhw4bjHAICRePDBB/+qqo7d3XXdRXrhwoWZnJwc9xgAMBKttSf3dJ2HuwGgUyINAJ0SaQDolEgDQKdEGgA6JdIA0CmRBoBOiTQAdEqkAaBTIg0AnRJpAOiUSANAp0QaADol0gDQKZEGgE6JNAB0SqQBoFMiDQCdEmkA6JRIA0Cn5o17gFFoN7VxjwDANFY31li2a08aADol0gDQKZEGgE6JNAB0SqQBoFMiDQCdEmkA6JRIA0CnRBoAOiXSANApkQaATok0AHRKpAGgUyINAJ0SaQDolEgDQKdEGgA6NW/YG2itbU3yvZ0u+mxV/bthbxcApruhRzrJxqo6cwTbAYAZZRSRBoDp5fbnnl1+z/Idp1esWDGyMUbxnPShrbWVO328bdcbtNauaa1NttYm165dO4KRAKB/raqGu4HWflFVhw96+4mJiZqcnDywM9zUDuj9ATC71I3Da2Vr7cGqmtjddY7uBoBOiTQAdGoUB44d2lpbudP5L1fVDSPYLgBMa0OPdFXNHfY2AGAm8nA3AHRKpAGgUyINAJ0SaQDolEgDQKdEGgA6JdIA0CmRBoBOiTQAdEqkAaBTIg0AnRJpAOiUSANAp0QaADol0gDQKZEGgE7NG/cAo1A31rhHAID9Zk8aADol0gDQKZEGgE6JNAB0SqQBoFMiDQCdEmkA6JRIA0CnRBoAOiXSANApkQaATok0AHRKpAGgUyINAJ0SaQDolEgDQKdEGgA6JdIA0CmRBoBOiTQAdEqkAaBTIg0AnRJpAOiUSANAp0QaADol0gDQKZEGgE6JNAB0SqQBoFMiDQCdEmkA6JRIA0CnRBoAOiXSANApkQaATok0AHRKpAGgUyINAJ0SaQDolEgDQKdEGgA6JdIA0CmRBoBOzRv3AKPQbmrjHgFmvbqxxj0CTDv2pAGgUyINAJ0SaQDolEgDQKdEGgA6JdIA0CmRBoBOiTQAdEqkAaBTIg0AnRJpAOiUSANAp0QaADol0gDQKZEGgE6JNAB0SqQBoFNDjXRrbWtrbWVr7aHW2ndaa+cNc3sAMJPMG/L9b6yqM5OktXZxkt9P8uohbxMAZoRhR3pnRyb52Qi3B4zb7b86ufye5UmSFStWjGUUmI6GHelDW2srk8xPclySC3d3o9baNUmuSZIFCxYMeSQAmB5aVQ3vzlv7RVUdPnX63CR/nOT02stGJyYmanJy8sDOcVM7oPcH7L+6cXi/a2A6a609WFUTu7tuZEd3V9X/SnJMkmNHtU0AmM5GFunW2qlJ5iZZN6ptAsB0NqrnpJOkJbmyqrYOeZsAMCMMNdJVNXeY9w8AM5l3HAOATok0AHRKpAGgUyINAJ0SaQDolEgDQKdEGgA6JdIA0CmRBoBOiTQAdEqkAaBTIg0AnRJpAOiUSANAp0QaADol0gDQqXnjHmAU6sYa9wgAsN/sSQNAp0QaADol0gDQKZEGgE6JNAB0SqQBoFMiDQCdEmkA6JRIA0CnRBoAOiXSANApkQaATok0AHRKpAGgUyINAJ0SaQDolEgDQKdEGgA6JdIA0CmRBoBOiTQAdEqkAaBTIg0AnRJpAOiUSANAp0QaADol0gDQKZEGgE6JNAB0SqQBoFMiDQCdEmkA6JRIA0CnRBoAOiXSANApkQaATok0AHRKpAGgUyINAJ0SaQDolEgDQKdEGgA6JdIA0Kl54x5gFNpNbdwj7FA31rhHAGCasCcNAJ0SaQDolEgDQKdEGgA6JdIA0Kl9Rrpt99uttQ9NnV/QWls2/NEAYHYbZE/6PyU5N8kVU+c3JPnE0CYCAJIM9jrpf1hVZ7fW/neSVNXPWmsHD3kuAJj1BtmT3txam5ukkqS1dmySbUOdCgAYKNIfS3J3khe31v5NkvuS/NuhTgUA7P3h7tbanCR/meT6JBclaUneUlWPjGA2AJjV9hrpqtrWWvtEVZ2V5NERzQQAZLCHu7/eWrustdbPX6kAgFlgkEi/O8nnkzzbWnu6tbahtfb0kOcCgFlvny/BqqojRjEIAPBc+4x0a+03dnd5Vf3FgR8HAPilQd7M5AM7nZ6fZFmSB5NcOJSJAIAkgz3c/aadz7fWTkzy0X19Xmtta5LvJTkoyZYkf5rk5qryRigAMIBB9qR3tSbJKwe43caqOjNJWmsvTvKZJEcmufEFbBMAZp1BnpO+JVNvCZrtR4OfmeQ7+7ORqvppa+2aJA+01n6vqmqfnzRDLV++PCtWrBj3GABMA4PsSU/udHpLkj+rqm/s74aq6omp9wB/cZL/t/N1UwG/JkkWLFiwv3cNADPSIJH+e1X1H3e+oLX2/l0v+7uoqk8l+VSSTExMzOi9bHvRAAxqkDczuXI3l121vxtqrZ2UZGuSn+7v5wLAbLTHPenW2hVJfivJotbaF3e66ogkf70/G5n685a3Jvn4bH4+GgD2x94e7v5mkp8kOSbJf9jp8g1JvjvAfR/aWluZX70E684kf/QC5wSAWWePka6qJ5M8meTcF3LHVTX3hQ4FAAzwnHRr7VWttQdaa79orf1ta22rP7ABAMM3yIFjH09yRZLHkhya5F1JPjHMoQCAwSKdqvphkrlVtbWqbk/yuuGOBQAM8jrpv2mtHZxkZWvtD7L9YLKB4g4AvHCDxPadU7e7LskzSU5MctkwhwIABvsrWE+21g5NclxV3TSCmQCADHZ095uSrEzy5anzZ+7y5iYAwBAM8nD37yVZluTnSVJVK5MsGuJMAEAGi/Tmqlq/y2Xe2hMAhmyQo7u/31r7rSRzW2snJ3lftr9lKAAwRHvck26t3Tl18vEkv57k2SR/luTpJP90+KMBwOy2tz3pc1prL03ytiQX5Ll/ZOPXkmwa5mAAMNvtLdK3Jvl6kpOSTO50ecv256RPGuJcADDr7fHh7qr6WFW9MsmfVNVJO30sqiqBBoAh2+fR3VX1u6MYBAB4Lu/BDQCdGuQlWNNe3ehl3QBMP/akAaBTIg0AnRJpAOiUSANAp0QaADol0gDQKZEGgE6JNAB0SqQBoFMiDQCdEmkA6JRIA0CnRBoAOiXSANApkQaATok0AHRKpAGgUyINAJ0SaQDolEgDQKdEGgA6JdIA0CmRBoBOiTQAdEqkAaBTIg0AnRJpAOiUSANAp0QaADol0gDQKZEGgE6JNAB0SqQBoFMiDQCdEmkA6JRIA0CnRBoAOiXSANApkQaATok0AHRKpAGgUyINAJ0SaQDo1LxxDzAK7aY28m3WjTXybQIws9iTBoBOiTQAdEqkAaBTIg0AnRJpAOiUSANAp0QaADol0gDQKZEGgE6JNAB0SqQBoFMiDQCdEmkA6JRIA0CnRBoAOiXSANApkQaATg090q21t7TWqrV26rC3BQAzySj2pK9Ict/UfwGAAQ010q21w5P8oyS/k+Ttw9xWV25Pli9fPu4pAJjmhr0n/eYkX66qVUnWtdbO2d2NWmvXtNYmW2uTa9euHfJIADA9DDvSVyT57NTpz2YPD3lX1aeqaqKqJo499tghjzQCVycrVqwY9xQATHPzhnXHrbUXJbkwyeLWWiWZm6Raax+oqhrWdgFgphjmnvTlSe6sqpdV1cKqOjHJXyY5f4jbBIAZY5iRviLJ3btc9l/iKG8AGMjQHu6uqgt2c9nHhrU9AJhpvOMYAHRKpAGgUyINAJ0SaQDolEgDQKdEGgA6JdIA0CmRBoBOiTQAdEqkAaBTIg0AnRJpAOiUSANAp0QaADol0gDQKZEGgE7NG/cAo1A31rhHAID9Zk8aADol0gDQKZEGgE6JNAB0SqQBoFMiDQCdEmkA6JRIA0CnRBoAOiXSANApkQaATok0AHRKpAGgUyINAJ0SaQDolEgDQKdEGgA6JdIA0CmRBoBOiTQAdEqkAaBTIg0AnRJpAOiUSANAp0QaADol0gDQKZEGgE6JNAB0SqQBoFMiDQCdEmkA6JRIA0CnRBoAOiXSANApkQaATok0AHRKpAGgUyINAJ0SaQDolEgDQKdEGgA6JdIA0CmRBoBOzfxItzbuCQDgBZn5kQaAaUqkAaBTIg0AnRJpAOiUSANAp0QaADol0gDQKZEGgE6JNAB0SqQBoFMiDQCdEmkA6JRIA0CnRBoAOiXSANApkQaATok0AHRq6JFurf2D1tpnW2uPt9YebK3999baK4a9XQCY7uYN885bay3J3Uk+XVVvn7psSZKXJFk1zG0DwHQ37D3pC5Jsrqpbf3lBVT1UVfcOebs7LB/VhgDgABt2pE9P8uC+btRau6a1Ntlam1y7du2QRwKA6aGLA8eq6lNVNVFVE8cee+wBve8VB/TeAGB0hh3p7yc5Z8jbAIAZadiR/vMkh7TWrvnlBa21M1pr5w95uwAw7Q010lVVSf5xkt+cegnW95P8fpL/O8ztAsBMMNSXYCVJVf04yVuHvR0AmGm6OHAMAHg+kQaATok0AHRKpAGgUyINAJ0SaQDolEgDQKdEGgA6JdIA0CmRBoBOiTQAdEqkAaBTIg0AnRJpAOjU0P9UJQDPt3nz5qxZsyabNm0a9yiMyPz583PCCSfkoIMOGvhzRBpgDNasWZMjjjgiCxcuTGtt3OMwZFWVdevWZc2aNVm0aNHAnzfzH+6uGvcEAM+zadOmHH300QI9S7TWcvTRR+/3IyczP9IAnRLo2eWFfL9FGgA6JdIAPWjtwH4M4LzzzhvyF/Vcq1evzmc+85mRbnO6E2mAWeqb3/zmyLa1ZcsWkX4BRBpgljr88MOTJCtWrMirX/3qvPnNb85JJ52UG264IXfddVeWLVuWxYsX5/HHH0+SXHXVVXnPe96TiYmJvOIVr8iXvvSlJNsPgrv66quzePHinHXWWbnnnnuSJHfccUcuueSSXHjhhbnoootyww035N57782ZZ56Zm2++OatXr87555+fs88+O2efffaOfzSsWLEiy5cvz+WXX55TTz0173jHO1JTBwE/8MADOe+887JkyZIsW7YsGzZsyNatW/OBD3wgS5cuzRlnnJHbbrttj1/zoF/r2rVrc9lll2Xp0qVZunRpvvGNbyRJ7r///px77rk566yzct555+UHP/jBjq/10ksvzete97qcfPLJuf766w/MN6mquvo455xzCmCme/jhh597wfbXohy4jwEcdthhVVV1zz331FFHHVU//vGPa9OmTfXSl760PvShD1VV1Uc/+tF6//vfX1VVV155ZV188cW1devWWrVqVR1//PG1cePG+shHPlJXX311VVU98sgjdeKJJ9bGjRvr9ttvr+OPP77WrVu3YztveMMbdmz/mWeeqY0bN1ZV1apVq+qXv//vueeeOvLII+tHP/pRbd26tV71qlfVvffeW88++2wtWrSo7r///qqqWr9+fW3evLluu+22+vCHP1xVVZs2bapzzjmnnnjiid1+zYN+rVdccUXde++9VVX15JNP1qmnnvqcbVZVffWrX61LL720qqpuv/32WrRoUf385z+vjRs31oIFC+qpp5563vaf932vqiSTtYcmep00AFm6dGmOO+64JMnLX/7yvPa1r02SLF68eMeecZK89a1vzZw5c3LyySfnpJNOyqOPPpr77rsv733ve5Mkp556al72spdl1apVSZLXvOY1edGLXrTbbW7evDnXXXddVq5cmblz5+74nCRZtmxZTjjhhCTJmWeemdWrV+eoo47Kcccdl6VLlyZJjjzyyCTJV77ylXz3u9/NF77whSTJ+vXr89hjj+3x9ciDfK1f+9rX8vDDD+/4nKeffjq/+MUvsn79+lx55ZV57LHH0lrL5s2bd9zmoosuylFHHZUkOe200/Lkk0/mxBNP3Nuy75NIA5BDDjlkx+k5c+bsOD9nzpxs2bJlx3W7voxoXy8rOuyww/Z43c0335yXvOQleeihh7Jt27bMnz9/t/PMnTv3OTPsqqpyyy235OKLL97rLLu77z19rdu2bcu3vvWt58yUJNddd10uuOCC3H333Vm9enWWL1/+gmYelOekARjY5z//+Wzbti2PP/54nnjiiZxyyik5//zzc9dddyVJVq1alaeeeiqnnHLK8z73iCOOyIYNG3acX79+fY477rjMmTMnd955Z7Zu3brXbZ9yyin5yU9+kgceeCBJsmHDhmzZsiUXX3xxPvnJT+7Yq121alWeeeaZv9PX+drXvja33HLLjvMrV67cMfPxxx+fZPvz0MMm0gA9ONDPSg/JggULsmzZsrz+9a/Prbfemvnz5+faa6/Ntm3bsnjx4rztbW/LHXfc8Zy9yl8644wzMnfu3CxZsiQ333xzrr322nz605/OkiVL8uijj+51rztJDj744Hzuc5/Le9/73ixZsiSvec1rsmnTprzrXe/KaaedlrPPPjunn3563v3ud/+d92I/9rGPZXJyMmeccUZOO+203HrrrUmS66+/Ph/84Adz1llnHZA95X1p1dnbZk5MTNTk5OS4xwAYqkceeSSvfOUrxz3Gfrnqqqvyxje+MZdffvm4R5m2dvd9b609WFUTu7u9PWkA6JQDxwAYyCiegz1Qvve97+Wd73zncy475JBD8u1vf3tME70wIg3AjLN48eIdB3tNZx7uBhiT3o4JYrheyPdbpAHGYP78+Vm3bp1QzxJVlXXr1j3vddf74uFugDE44YQTsmbNmqxdu3bcozAi8+fP3/EuaoMSaYAxOOigg/b4tpXwSx7uBoBOiTQAdEqkAaBT3b0taGttbZInD/DdHpPkrw7wfc5U1mpw1mpw1mpw1mpwM2WtXlZVx+7uiu4iPQyttck9vS8qz2WtBmetBmetBmetBjcb1srD3QDQKZEGgE7Nlkh/atwDTCPWanDWanDWanDWanAzfq1mxXPSADAdzZY9aQCYdmZUpFtrr2ut/aC19sPW2g27uf6Q1trnpq7/dmtt4ein7MMAa/UbrbXvtNa2tNYuH8eMvRhgrf5Za+3h1tp3W2tfb629bBxz9mCAtXpPa+17rbWVrbX7WmunjWPOHuxrrXa63WWttWqtzeijmPdmgJ+rq1pra6d+rla21t41jjmHoqpmxEeSuUkeT3JSkoOTPJTktF1uc22SW6dOvz3J58Y9d8drtTDJGUn+NMnl456587W6IMmvTZ3+XT9Xe12rI3c6fUmSL4977l7Xaup2RyT5iyTfSjIx7rl7XaskVyX5+LhnHcbHTNqTXpbkh1X1RFX9bZLPJnnzLrd5c5JPT53+QpKLWmtthDP2Yp9rVVWrq+q7SbaNY8CODLJW91TV30yd/VaS/fszNzPHIGv19E5nD0syWw+KGeT3VZJ8OMm/T7JplMN1ZtC1mpFmUqSPT/Kjnc6vmbpst7epqi1J1ic5eiTT9WWQtWK7/V2r30nyP4Y6Ub8GWqvW2j9prT2e5A+SvG9Es/Vmn2vVWjs7yYlV9d9GOViHBv1/8LKpp5y+0Fo7cTSjDd9MijSMVWvtt5NMJPnDcc/Ss6r6RFW9PMm/SPKvxj1Pj1prc5L8UZJ/Pu5Zpon/mmRhVZ2R5Kv51SOm095MivT/SbLzv55OmLpst7dprc1LclSSdSOZri+DrBXbDbRWrbXfTPIvk1xSVc+OaLbe7O/P1WeTvGWoE/VrX2t1RJLTk6xora1O8qokX5ylB4/t8+eqqtbt9P/dHyc5Z0SzDd1MivQDSU5urS1qrR2c7QeGfXGX23wxyZVTpy9P8uc1ddTBLDPIWrHdPteqtXZWktuyPdA/HcOMvRhkrU7e6ewbkjw2wvl6ste1qqr1VXVMVS2sqoXZfqzDJVU1OZ5xx2qQn6vjdjp7SZJHRjjfUM0b9wAHSlVtaa1dl+R/ZvvRgH9SVd9vrf3rJJNV9cUk/znJna21Hyb562z/Zs86g6xVa21pkruT/P0kb2qt3VRVvz7GscdiwJ+rP0xyeJLPTx2H+FRVXTK2ocdkwLW6bupRh81JfpZf/aN5VhlwrcjAa/W+1tolSbZk++/2q8Y28AHmHccAoFMz6eFuAJhRRBoAOiXSANApkQaATok0AHRKpAGgUyINAJ0SaQDo1P8HOXhARHlxcfMAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 576x576 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "%matplotlib inline\n",
    "plot_importance(importances)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Automatically grouped features by correlation:\n",
      "1 ['D', 'D2']\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "1b33b416ef064bb7acd791d8345ae7fd",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAfcAAAHSCAYAAADxFIKiAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAS20lEQVR4nO3cf4zkd33f8dcbn5NcDVhtTK5Rod4EUZGUwxC2PxWaJUU0kVOTKCgCpZWpQKe2IfyRqOKkVkW0f2BKE6khbekpaXCIKqOmQXLjhEKTjKKI0PYMNifqlmKw27RqIQ5yOefa2O6nf+w6Xm/2fN899vb7nfc+HpJ1M7Pf/c773prz82Z2bmqMEQCgj+fMPQAAcLjEHQCaEXcAaEbcAaAZcQeAZsQdAJo5MfcAh+Wmm24aGxsbh3rOxx57LDfccMOhnrMje5rOrqaxp2nsabquu7r33nt/e4zxgr23t4n7xsZGzp8/f6jnXK1W2draOtRzdmRP09nVNPY0jT1N13VXVfXwfrd7WR4AmhF3AGhG3AGgGXEHgGbEHQCaEXcAaEbcAaAZcQeAZsQdAJoRdwBoRtwBoBlxB4BmxB0AmhF3AGhG3AGgGXEHgGbEHQCaEXcAaEbcAaAZcQeAZk7MPQAArKtb3vXRPHrp8STJ877lbL7ywB2//7UbT16f+9/5ulnmEncAuEqPXno8D91xa5Lk9J1nf/9ykmycvWeusbwsDwDdiDsANCPuANCMuANAM+IOAM2IOwA0I+4A0Iy4A0Az4g4AzYg7AFylh9/zPZOPraprOMkziTsANCPuANCMuANAM+IOAM2IOwA0I+4A0MyJuQe4nKp6MsmFXTfdNca4Y655AGBdLDbuSS6NMV4x9xAAsG68LA8AzSz5mfvJqrpv1/V3jzE+tPuAqjqT5EySnDp1KqvV6lAHuHjx4qGfsyN7ms6uprGnaexpumu5q93n3XsfG2fvueyx11KNMY7kjg6qqi6OMZ479fjNzc1x/vz5Q51htVpla2vrUM/ZkT1NZ1fT2NM09jTdtdpVVeWpjp6+83Qu3P70W8U2zt6Th+64dd9jD/H+7x1jbO693cvyANCMuANAM+v0M/ePjDHOzjYNAKyJxcZ9jHHd3DMAwDrysjwANCPuANCMuANAM+IOAM2IOwA0I+4A0Iy4A8BVuvkdvzj52KP8uHdxB4BmxB0AmhF3AGhG3AGgGXEHgGbEHQCaEXcAaEbcAaAZcQeAZk7MPQAArLONs/ckSZ73LU9fTpIbT14/10jiDgBX66E7bt117dbLHnfUvCwPAM2IOwA0I+4A0Iy4A0Az4g4AzYg7ADQj7gDQjLgDQDPiDgDNiDsANCPuANCMuANAM+IOAM2IOwA0I+4A0Iy4A0Az4g4AzYg7ADQj7gDQjLgDQDPiDgDNiDsANCPuANCMuANAM+IOAM2IOwA0I+4A0Iy4A0Az4g4AzYg7ADQj7gDQjLgDQDPiDgDNiDsANCPuANCMuANAM+IOAM2IOwA0I+4A0Iy4A0Az4g4AzYg7ADRzYu4BAPb6oV95LM95+HS+8sAdufHk9bn/na+beyRYK565A4vz2OPbvz50x6159NLj8w4Da0jcAaAZcQeAZsQdAJoRdwBoRtwBoBlxB4BmxB0AmhF3AGhG3AGgGXEHFqWqJt0GXJ64A0Az4g4AzYg7ADQj7gDQjLgDQDPiDgDNLDLuVfVkVd1XVfdX1Ser6s/PPRMArIsTcw9wGZfGGK9Ikqr6S0neneQ75h0JANbDIp+57/H8JF+eewgAWBdLfeZ+sqruS/J1Sb4xyXfud1BVnUlyJklOnTqV1Wp1qENcvHjx0M/ZkT1NZ1cHs3tX9vYHeTxNd9x2tdS4735Z/s8l+dmqetkYY+w+aIxxLsm5JNnc3BxbW1uHOsRqtcphn7Mje5rOrg5ma2sr+cg9T1/mGTyepjtuu1r8y/JjjN9MclOSF8w9CwCsg8XHvapemuS6JI/MPQsArIOlviz/1M/ck6SS3D7GeHLOgQBgXSwy7mOM6+aeAQDW1eJflgcADkbcAaAZcQeAZsQdAJoRdwBoRtwBoBlxBxZlz6dMX/Y24PLEHQCaEXcAaEbcAaAZcQeAZsQdAJoRdwBoRtwBoBlxB4BmxB0AmhF3YLE2zt6TG09eP/cYsHZOzD0AwF4f+K4bsrV1Ye4xYG155g4AzYg7ADQj7gDQjLgDQDPiDgDNiDsANCPuANCMuANAM+IOAM2IOwA0I+4A0Iy4A0Az4g4AzYg7ADQj7gDQjLgDQDPiDgDNiDsANCPuANCMuANAM+IOAM2IOwA0I+4A0Iy4A0Az4g4AzYg7ADQj7gDQjLgDQDPiDgDNiDsANCPuANCMuANAM+IOAM2IOwA0I+4A0Iy4A0Az4g4AzYg7ADQj7gDQjLgDQDPiDgDNiDsANCPua+L0nadzy7s+OvcYAKwBcV8jj156fO4RAFgD4g4AzYg7ADQj7gDQjLgDQDPiDgDNiDsANCPuANCMuANAM+K+BqoqSfLwe75n5kkAWAfiDgDNiDsANCPuANCMuANAM+IOAM2IOwA0I+4A0MwV415VT1bVfVX1maq6v6p+tKr2/b6qeklV/fuq+nRV/dtnOedGVV2qqk9V1QM73/PmXV//wZ1zXKiqj1fVLVf1uwOAY+jEhGMujTFekSRV9Q1J/kWS5yd55z7Hnk3yT8cYP1NV33SF8z44xnjlznm/OckvVFWNMX4myReSfMcY48tV9d1JziX5M9N+SwBwvB3oZfkxxheTnEnytnrqY9Oe6feSvHDn2C8c4LyfT/IjSd6+c/3jY4wv73z5E0+dEwC4sinP3J9hjPH5qrouyTck+V97vvxgkndU1afGGL94wFN/MslL97n9LUl+eb9vqKoz2f7LRk6dOpXVanXAu3x2Fy9ePPRzfrWWNk+yzD0tlV1NY0/T2NN0x21XB4775VTVtyV5XZJXJvlYVf1Okt/MdvBfPMYYVzrFPud8Tbbj/u37fcMY41y2X7LP5ubm2Nrauur597NarXLY5/xqLW2eZJl7Wiq7msaeprGn6Y7brg4c952fjz+Z5It7vvTaJB8fY/xWVX1fkruTvD/JL00Ie7L9l4IHdt3Py5P8VJLvHmM8ctA5AeC4OtDP3KvqBdkO9k/uE+xPJXl9Vd04xvhPSd6b5MeS/NyE824k+YdJ3rdz/Y8n+YUkf3WM8dmDzAgAx92UZ+4nq+q+JNcneSLJB5P8+N6Dxhgfq6qfS/KJqvrdbL/j/a8l+UBVvXqM8aU93/LiqvpUkq9L8pUkPzHG+MDO1/5ukq9P8k923rf3xBhj88C/OwA4hq4Y9zHGdVNPNsb4sWw/W9/tA/sc91CSk89ynrcmeevU+wUAnuYT6gCgGXEHgGbEHQCaEXcAaEbcAaAZcQeAZsR9DTz1eUE3v+OgH9cPwHEk7gDQjLgDQDPiDgDNiDsANCPuANCMuANAM+IOAM2IOwA0I+4A0Iy4r5EbT14/9wgArIETcw/ANBduvzD3CACsCc/cAaAZcQeAZsQdAJoRdwBoRtwBoBlxB4BmxB0AmhF3AGhG3AGgGXEHgGbEHQCaEXcAaEbcAaAZcQeAZsQdAJoRdwBoRtwBoBlxB4BmxB0AmhF3AGhG3AGgGXEHgGbEHQCaEXcAaEbcAaAZcQeAZsQdAJoRdwBoRtwBoBlxB4BmxB0AmhF3AGhG3AGgGXEHgGbEHQCaEXcAaEbcAaAZcQeAZsQdAJoRdwBoRtwBoBlxB4BmxH1hTt95Ore866NzjwHAGhP3BXr00uNzjwDAGhN3AGhG3AGgGXEHgGbEHQCaEXcAaEbcAaAZcQeAZsQdAJoR9wWpqn0vA8BBiDsANCPuANCMuANAM+IOAM2IOwA0I+4A0Iy4A0Azi417VX1vVY2qeuncswDAOlls3JO8Kclv7PwKAEy0yLhX1XOTfHuStyR548zjAMBaOTH3AJfx+iQfGWN8tqoeqapXjTHu3XtQVZ1JciZJTp06ldVqdahDXLx48dDPeRBz3vdBzL2ndWJX09jTNPY03XHb1VLj/qYk/2jn8l071/9A3McY55KcS5LNzc2xtbV1qEOsVqsc9jkPYs77Poi597RO7Goae5rGnqY7brtaXNyr6o8k+c4kp6tqJLkuyaiqvzXGGPNOBwDLt8Sfub8hyQfHGDePMTbGGC9K8oUkr555LgBYC0uM+5uSfHjPbf8q3jUPAJMs7mX5McZr9rntJ+aYBQDW0RKfuQMAXwVxB4BmxB0AmhF3AGhG3AGgGXEHgGbEfUF2fwCfD+MD4GqJOwA0I+4A0Iy4A0Az4g4AzYg7ADQj7gDQjLgDQDPiDgDNiDsANCPuC3TjyevnHgGANXZi7gF4pgu3X5h7BADWnGfuANCMuANAM+IOAM2IOwA0I+4A0Iy4A0Az4g4AzYg7ADQj7gDQjLgDQDPiDgDNiDsANCPuANCMuANAM+IOAM2IOwA0I+4A0Iy4A0Az4g4AzYg7ADQj7gDQjLgDQDPiDgDNiDsANCPuANCMuANAM+IOAM2IOwA0I+4A0Iy4A0Az4g4AzYg7ADQj7gDQjLgDQDPiDgDNiDsANCPuANCMuANAM+IOAM2IOwA0I+4A0Iy4A0Az4v4sfvjhH557BAA4MHEHgGbEHQCaEXcAaEbcAaAZcQeAZsQdAJoRdwBoRtwBoBlxB4BmxP0yqmruEQDgqog7ADQj7gDQjLgDQDPiDgDNiDsANCPuANCMuANAM4uNe1X90aq6q6oerKp7q+qXqupPzD0XACzdibkH2E9tf4LMh5PcOcZ4485ttyQ5leSzc84GAEu3yLgneU2Sx8cY73/qhjHG/TPOAwBrY6lxf1mSe690UFWdSXImSU6dOpXVanXog1yLc3Zz8eJFe5rIrqaxp2nsabrjtqulxn2SMca5JOeSZHNzc2xtbR36fVyLc3azWq3saSK7msaeprGn6Y7brpb6hrrPJHnV3EMAwDpaatx/NcnX7rzsniSpqpdX1atnnAkA1sIi4z7GGEm+L8lrd/4p3GeSvDvJ/5x3MgBYvsX+zH2M8T+S/MDccwDAulnkM3cA4OqJOwA0I+4A0Iy4A0Az4g4AzYg7ADQj7pex/U/tAWD9iDsANCPuANCMuANAM+IOAM2IOwA0I+4A0Iy4A0Az4g4AzYg7ADQj7gDQjLg/i/fd/L65RwCAAxN3AGhG3AGgGXEHgGbEHQCaEXcAaEbcAaAZcQeAZsQdAJoRdwBoRtwBoBlxB4BmxB0AmhF3AGhG3AGgGXEHgGbEHQCaEXcAaEbcAaAZcQeAZsQdAJqpMcbcMxyKqvpSkocP+bQ3JfntQz5nR/Y0nV1NY0/T2NN0XXd18xjjBXtvbBP3a6Gqzo8xNueeY+nsaTq7msaeprGn6Y7brrwsDwDNiDsANCPuz+7c3AOsCXuazq6msadp7Gm6Y7UrP3MHgGY8cweAZsQ9SVV9V1X956r6XFWd3efrX1tVH9r5+r+rqo2jn3J+E/b0F6rqk1X1RFW9YY4Zl2DCnn6kqv5jVX26qn6lqm6eY84lmLCrv15VF6rqvqr6jar61jnmnNuV9rTruO+vqlFVx+Zd4btNeDy9uaq+tPN4uq+q3jrHnEdijHGs/0tyXZIHk3xzkq9Jcn+Sb91zzN9M8v6dy29M8qG5517onjaSvDzJzyZ5w9wzL3hPr0nyh3Yu/43j+Hg6wK6ev+vybUk+MvfcS9zTznHPS/LrST6RZHPuuZe4pyRvTvKTc896FP955p786SSfG2N8fozxe0nuSvL6Pce8PsmdO5d/PslfrKo6whmX4Ip7GmM8NMb4dJL/N8eACzFlT782xvjdnaufSPLCI55xKabs6n/vunpDkuP4JqEp/49Kkr+f5D1J/s9RDrcgU/d0LIh78seS/Ldd139r57Z9jxljPJHk0SRffyTTLceUPXHwPb0lyS9f04mWa9KuquqHqurBJP8gyduPaLYlueKequrbkrxojHHPUQ62MFP/7H3/zo/Efr6qXnQ0ox09cYeZVNVfSbKZ5L1zz7JkY4x/PMZ4cZJ3JPk7c8+zNFX1nCQ/nuRH555lDfzrJBtjjJcn+ViefkW2HXFP/nuS3X97e+HObfseU1UnktyY5JEjmW45puyJiXuqqtcm+dtJbhtj/N8jmm1pDvqYuivJ917TiZbpSnt6XpKXJVlV1UNJ/mySu4/hm+qu+HgaYzyy68/bTyV51RHNduTEPfkPSV5SVd9UVV+T7TfM3b3nmLuT3L5z+Q1JfnXsvDvjGJmyJybsqapemeSfZTvsX5xhxqWYsquX7Lp6a5L/coTzLcWz7mmM8egY46YxxsYYYyPb7+O4bYxxfp5xZzPl8fSNu67eluSBI5zvSJ2Ye4C5jTGeqKq3Jfk32X635T8fY3ymqv5ekvNjjLuT/HSSD1bV55L8TrYfNMfKlD1V1Z9K8uEkfzjJX66qd40x/uSMYx+5iY+n9yZ5bpJ/ufO+zP86xrhttqFnMnFXb9t5lePxJF/O03/JPjYm7unYm7int1fVbUmeyPb/y98828DXmE+oA4BmvCwPAM2IOwA0I+4A0Iy4A0Az4g4AzYg7ADQj7gDQjLgDQDP/HwaXyDutlQJMAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 576x576 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "dataset = Dataset(df=df, target=\"target\", features=[\"A\", \"B\", \"C\", \"D\", \"D2\", \"E\"], \n",
    "                  auto_group_threshold=0.7)\n",
    "fi = LOFOImportance(dataset, scorer, n_jobs=-1)\n",
    "\n",
    "importances = fi.get_importance()\n",
    "importances\n",
    "\n",
    "\n",
    "plot_importance(importances, kind=\"box\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/aerdem/projects/lofo-importance/lofo/lofo_importance.py:45: UserWarning: Warning: If your model is multithreaded, please initialise the numberof jobs of LOFO to be equal to 1, otherwise you may experience performance issues.\n",
      "  warnings.warn(warning_str)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>feature</th>\n",
       "      <th>importance_mean</th>\n",
       "      <th>importance_std</th>\n",
       "      <th>val_imp_0</th>\n",
       "      <th>val_imp_1</th>\n",
       "      <th>val_imp_2</th>\n",
       "      <th>val_imp_3</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>names</td>\n",
       "      <td>0.232698</td>\n",
       "      <td>0.025097</td>\n",
       "      <td>2.480101e-01</td>\n",
       "      <td>0.193688</td>\n",
       "      <td>0.229018</td>\n",
       "      <td>0.260076</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>B</td>\n",
       "      <td>0.002641</td>\n",
       "      <td>0.001413</td>\n",
       "      <td>4.608295e-03</td>\n",
       "      <td>0.003212</td>\n",
       "      <td>0.000838</td>\n",
       "      <td>0.001906</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>A</td>\n",
       "      <td>0.000484</td>\n",
       "      <td>0.000535</td>\n",
       "      <td>1.256808e-03</td>\n",
       "      <td>-0.000140</td>\n",
       "      <td>0.000140</td>\n",
       "      <td>0.000681</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>D</td>\n",
       "      <td>-0.000067</td>\n",
       "      <td>0.000205</td>\n",
       "      <td>-1.110223e-16</td>\n",
       "      <td>0.000140</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>-0.000408</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>interactions</td>\n",
       "      <td>-0.000071</td>\n",
       "      <td>0.000155</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>-0.000140</td>\n",
       "      <td>-0.000279</td>\n",
       "      <td>0.000136</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>C</td>\n",
       "      <td>-0.000102</td>\n",
       "      <td>0.000203</td>\n",
       "      <td>-1.396453e-04</td>\n",
       "      <td>0.000140</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>-0.000408</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        feature  importance_mean  importance_std     val_imp_0  val_imp_1  \\\n",
       "5         names         0.232698        0.025097  2.480101e-01   0.193688   \n",
       "3             B         0.002641        0.001413  4.608295e-03   0.003212   \n",
       "2             A         0.000484        0.000535  1.256808e-03  -0.000140   \n",
       "1             D        -0.000067        0.000205 -1.110223e-16   0.000140   \n",
       "4  interactions        -0.000071        0.000155  0.000000e+00  -0.000140   \n",
       "0             C        -0.000102        0.000203 -1.396453e-04   0.000140   \n",
       "\n",
       "   val_imp_2  val_imp_3  \n",
       "5   0.229018   0.260076  \n",
       "3   0.000838   0.001906  \n",
       "2   0.000140   0.000681  \n",
       "1   0.000000  -0.000408  \n",
       "4  -0.000279   0.000136  \n",
       "0   0.000000  -0.000408  "
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.feature_extraction.text import CountVectorizer\n",
    "from lightgbm import LGBMClassifier\n",
    "\n",
    "df = generate_test_data(1000, text=True)\n",
    "features = [\"A\", \"B\", \"C\", \"D\"]\n",
    "\n",
    "cv = CountVectorizer(ngram_range=(3, 3), analyzer=\"char\")\n",
    "feature_groups = dict()\n",
    "feature_groups[\"names\"] = cv.fit_transform(df[\"T\"])\n",
    "feature_groups[\"interactions\"] = df[[\"A\", \"B\"]].values*df[[\"C\", \"D\"]].values\n",
    "\n",
    "dataset = Dataset(df=df, target=\"binary_target\", features=features, feature_groups=feature_groups)\n",
    "\n",
    "lgbm = LGBMClassifier(random_state=0, n_jobs=1)\n",
    "\n",
    "lofo = LOFOImportance(dataset, model=lgbm, cv=4, scoring='roc_auc', n_jobs=4)\n",
    "\n",
    "importances = lofo.get_importance()\n",
    "importances"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.9"
  },
  "toc": {
   "colors": {
    "hover_highlight": "#DAA520",
    "navigate_num": "#000000",
    "navigate_text": "#333333",
    "running_highlight": "#FF0000",
    "selected_highlight": "#FFD700",
    "sidebar_border": "#EEEEEE",
    "wrapper_background": "#FFFFFF"
   },
   "moveMenuLeft": true,
   "nav_menu": {
    "height": "12px",
    "width": "252px"
   },
   "navigate_menu": true,
   "number_sections": true,
   "sideBar": true,
   "threshold": 4,
   "toc_cell": false,
   "toc_section_display": "block",
   "toc_window_display": false,
   "widenNotebook": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
